/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package org.jab.docsearch.converters;

import java.io.FileInputStream;
import java.io.IOException;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Layout;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.jab.docsearch.utils.FileUtils;

/**
 * Class for handling MS PowerPoint files
 *
 * @version $Id: PowerPoint.java 200 2016-12-05 16:55:14Z henschel $
 */
public class PowerPoint
        extends AbstractConverter
        implements ConverterInterface {

    private final String filename;


    /**
     * Constructor
     *
     * @param filename the PowerPoint file name
     */
    public PowerPoint(String filename) {
        this.filename = filename;
    }


    /**
     * @see ConverterInterface#parse()
     */
    @Override
    public void parse()
            throws ConverterException {
        if (filename == null) {
            log.error("parse() filename is null");
            throw new ConverterException("PowerPoint::parse() filename is null");
        }

        // check file filename
        String fileExt = FileUtils.getFileExtension(filename);

        // PowerPoint OOXML
        if ("pptx".equals(fileExt) || "ppsx".equals(fileExt)) {
            parseOOXML();
        }
        // PowerPoint OLE
        else {
            // get metadata and text
            FileInputStream fin = null;
            PowerPointExtractor we = null;
            try {
                fin = new FileInputStream(filename);

                we = new PowerPointExtractor(fin);

                // get meta data
                SummaryInformation si = we.getSummaryInformation();

                documentAuthor   = si.getAuthor();
                documentTitle    = si.getTitle();
                documentKeywords = si.getKeywords();

                // get text
                documentText = we.getText();
                we.close();
            }
            catch (IOException ioe) {
                log.error("parse() failed at PowerPoint file=" + filename, ioe);
                throw new ConverterException("PowerPoint::parse() failed at PowerPoint file=" + filename, ioe);
            }
            catch (Exception e) {
                log.error("parse() failed at PowerPoint file=" + filename, e);
                throw new ConverterException("PowerPoint::parse() failed", e);
            }
            finally {
                IOUtils.closeQuietly(fin, we);
            }
        }

        if (log.isDebugEnabled()) {
            log.debug("parse() PowerPoint file='" + filename + '\'' + Layout.LINE_SEP +
                    "title='" + documentTitle + '\'' + Layout.LINE_SEP +
                    "author='" + documentAuthor + '\'' + Layout.LINE_SEP +
                    "keywords='" + documentKeywords + '\'');
        }
    }


    /**
     * Parse file with PowerPoint OOXML Extractor
     *
     * @throws ConverterException  Converter problem
     */
    private void parseOOXML()
            throws ConverterException {

        // get metadata and text
        FileInputStream fin = null;
        XSLFPowerPointExtractor xwe = null;
        try {
            fin = new FileInputStream(filename);

            xwe = new XSLFPowerPointExtractor(OPCPackage.open(fin));

            // get meta data
            CoreProperties cp = xwe.getCoreProperties();

            documentAuthor   = cp.getCreator();
            documentTitle    = cp.getTitle();
            documentKeywords = cp.getKeywords();

            // get text
            documentText = xwe.getText();
        }
        catch (IOException ioe) {
            log.error("parse() failed at PowerPoint file=" + filename, ioe);
            throw new ConverterException("PowerPoint::parse() failed at PowerPoint file=" + filename, ioe);
        }
        catch (Exception e) {
            log.error("parse() failed at PowerPoint file=" + filename, e);
            throw new ConverterException("PowerPoint::parse() failed", e);
        }
        finally {
            IOUtils.closeQuietly(fin, xwe);
        }
    }
}
